library(car)
library(mosaic)
library(DT)
library(tidyverse)
Made by Lance, Logan, and Daniel. Let’s take a look.
files <- dir()
rdat <- read.csv(grep("RBdata.csv", files, value=TRUE), header=TRUE)
as.tibble(rdat)
# A tibble: 100 x 11
Y X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
<dbl> <dbl> <int> <dbl> <int> <int> <dbl> <dbl> <int> <dbl> <dbl>
1 4.55 35.4 0 3.93 0 0 7.56 2.29 0 27.2 3.29
2 1.52 89.3 0 0.244 0 1 3.70 2.51 0 28.7 10.5
3 -3.43 40.8 1 3.89 0 0 9.07 1.81 0 44.8 3.54
4 11.2 87.5 0 1.07 1 0 9.99 2.24 0 44.3 32.1
5 -1.29 15.7 1 2.99 0 0 5.94 2.58 1 48.7 13.0
6 2.88 16.6 1 4.63 0 0 1.23 1.20 0 85.4 30.6
7 -6.58 43.0 0 0.940 0 1 3.55 0.886 1 32.9 32.5
8 26.4 12.2 1 4.46 1 0 0.761 1.05 1 94.7 17.6
9 -22.2 129. 1 1.07 0 2 2.70 0.277 1 81.3 13.2
10 -16.2 60.9 1 3.73 0 2 7.97 1.23 1 32.2 28.0
# ... with 90 more rows
pairs(rdat, pch=16, cex=1, panel=panel.smooth, col.smooth="skyblue4", col=rgb(.2,.2,.2,.8))
lm1 <- lm(Y ~ X4, data=rdat)
summary(lm1)
##
## Call:
## lm(formula = Y ~ X4, data = rdat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.137 -5.492 0.243 5.403 24.846
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.319 1.436 -0.919 0.36
## X4 19.583 2.011 9.740 4.44e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.05 on 98 degrees of freedom
## Multiple R-squared: 0.4919, Adjusted R-squared: 0.4867
## F-statistic: 94.86 on 1 and 98 DF, p-value: 4.445e-16
palette(c("skyblue","orange"))
pairs(cbind(R=lm1$res, fit=lm1$fit, rdat), pch=16, cex=1, panel=panel.smooth, col.smooth="skyblue4", col=factor(rdat$X4))
lm2 <- lm(Y ~ X4 + X7, data=rdat)
summary(lm2)
##
## Call:
## lm(formula = Y ~ X4 + X7, data = rdat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.146 -4.714 -0.500 5.928 20.371
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.946 2.181 2.268 0.025562 *
## X4 19.536 1.894 10.313 < 2e-16 ***
## X7 -4.153 1.134 -3.662 0.000407 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.469 on 97 degrees of freedom
## Multiple R-squared: 0.5536, Adjusted R-squared: 0.5444
## F-statistic: 60.15 on 2 and 97 DF, p-value: < 2.2e-16
palette(c("skyblue","orange"))
pairs(cbind(R=lm2$res, fit=lm2$fit, rdat), pch=16, cex=1, panel=panel.smooth, col.smooth="skyblue4", col=factor(rdat$X4))
lm3 <- lm(Y ~ X4 + X7 + X4:X7, data=rdat)
summary(lm3)
##
## Call:
## lm(formula = Y ~ X4 + X7 + X4:X7, data = rdat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.8575 -5.4744 -0.2018 4.6318 25.1179
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.2725 2.6121 -0.104 0.91714
## X4 30.2422 3.7158 8.139 1.44e-12 ***
## X7 -0.6939 1.5059 -0.461 0.64600
## X4:X7 -7.1242 2.1613 -3.296 0.00137 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.022 on 96 degrees of freedom
## Multiple R-squared: 0.599, Adjusted R-squared: 0.5865
## F-statistic: 47.8 on 3 and 96 DF, p-value: < 2.2e-16
palette(c("skyblue","orange"))
pairs(cbind(R=lm3$res, fit=lm3$fit, rdat), pch=16, cex=1, panel=panel.smooth, col.smooth="skyblue4", col=factor(rdat$X4))
lm4 <- lm(Y ~ X4 + X4:X7, data=rdat)
summary(lm4)
##
## Call:
## lm(formula = Y ~ X4 + X4:X7, data = rdat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.8575 -5.6645 -0.0355 4.6318 25.1179
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.319 1.284 -1.028 0.307
## X4 31.289 2.928 10.685 < 2e-16 ***
## X4:X7 -7.818 1.544 -5.063 1.96e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.985 on 97 degrees of freedom
## Multiple R-squared: 0.5981, Adjusted R-squared: 0.5898
## F-statistic: 72.18 on 2 and 97 DF, p-value: < 2.2e-16
palette(c("skyblue","orange"))
pairs(cbind(R=lm4$res, fit=lm4$fit, rdat), pch=16, cex=1, panel=panel.smooth, col.smooth="skyblue4", col=factor(rdat$X8))
lm5 <- lm(Y ~ X4 + X8 + X4:X7 + X7:X8, data=rdat)
summary(lm5)
##
## Call:
## lm(formula = Y ~ X4 + X8 + X4:X7 + X7:X8, data = rdat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.579 -4.968 0.867 4.505 21.326
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.592 1.176 -2.203 0.03 *
## X4 37.918 2.741 13.833 < 2e-16 ***
## X8 -14.378 3.388 -4.243 5.12e-05 ***
## X4:X7 -11.905 1.476 -8.067 2.18e-12 ***
## X8:X7 10.929 1.828 5.978 3.94e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.625 on 95 degrees of freedom
## Multiple R-squared: 0.7165, Adjusted R-squared: 0.7046
## F-statistic: 60.03 on 4 and 95 DF, p-value: < 2.2e-16
palette(c("skyblue","orange","firebrick","green"))
pairs(cbind(R=lm5$res, fit=lm5$fit, rdat), pch=16, cex=1, panel=panel.smooth, col.smooth="skyblue4", col=interaction(factor(rdat$X4),factor(rdat$X8)))
There seems to be two more lines within each color…
palette(c("skyblue","orange","firebrick","green"))
par(mfrow=c(2,1))
plot(Y ~ X7, data=rdat, col=interaction(X4,X8), pch=16, cex=2)
plot(lm5, which=1, col=interaction(rdat$X4,rdat$X8), pch=16, cex=2)
lm6 <- lm(Y ~ X7*X4*X8*X2, data=rdat)
summary(lm6)
##
## Call:
## lm(formula = Y ~ X7 * X4 * X8 * X2, data = rdat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.1123 -1.6950 -0.3323 1.5209 8.7835
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.3565 1.3817 -0.258 0.797044
## X7 1.1105 0.8316 1.335 0.185349
## X4 26.0763 2.0539 12.696 < 2e-16 ***
## X8 -14.8849 2.9224 -5.093 2.13e-06 ***
## X2 10.4574 1.9297 5.419 5.62e-07 ***
## X7:X4 -7.9249 1.2300 -6.443 6.98e-09 ***
## X7:X8 9.1779 1.6122 5.693 1.79e-07 ***
## X4:X8 7.3003 4.0129 1.819 0.072443 .
## X7:X2 -9.2267 1.1673 -7.904 9.43e-12 ***
## X4:X2 9.6268 2.9044 3.315 0.001356 **
## X8:X2 -20.6577 4.2790 -4.828 6.12e-06 ***
## X7:X4:X8 -4.6715 2.3834 -1.960 0.053306 .
## X7:X4:X2 -4.9587 1.7343 -2.859 0.005354 **
## X7:X8:X2 7.7992 2.2884 3.408 0.001006 **
## X4:X8:X2 7.0978 5.6363 1.259 0.211412
## X7:X4:X8:X2 11.0406 3.1925 3.458 0.000856 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.956 on 84 degrees of freedom
## Multiple R-squared: 0.9623, Adjusted R-squared: 0.9556
## F-statistic: 143 on 15 and 84 DF, p-value: < 2.2e-16
lm7 <- lm(Y ~ X4 + X8 + X2 + X7:X4 + X7:X8 + X7:X2 + X4:X2 + X8:X2 + X4:X8 + X4:X8:X7 + X7:X4:X2 + X7:X4:X8:X2, data=rdat)
summary(lm7)
##
## Call:
## lm(formula = Y ~ X4 + X8 + X2 + X7:X4 + X7:X8 + X7:X2 + X4:X2 +
## X8:X2 + X4:X8 + X4:X8:X7 + X7:X4:X2 + X7:X4:X8:X2, data = rdat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.2350 -2.1023 -0.1049 1.7914 10.1187
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.0174 0.7703 1.321 0.190051
## X4 25.4852 1.6350 15.588 < 2e-16 ***
## X8 -20.4474 2.3233 -8.801 1.15e-13 ***
## X2 7.7289 1.5448 5.003 2.91e-06 ***
## X4:X7 -7.2293 0.9078 -7.964 5.87e-12 ***
## X8:X7 13.1312 1.1141 11.786 < 2e-16 ***
## X2:X7 -7.0674 0.7998 -8.837 9.70e-14 ***
## X4:X2 10.7584 2.4050 4.473 2.32e-05 ***
## X8:X2 -8.9983 1.8203 -4.943 3.70e-06 ***
## X4:X8 10.2995 2.8913 3.562 0.000599 ***
## X4:X8:X7 -7.2172 1.9058 -3.787 0.000280 ***
## X4:X2:X7 -6.2882 1.4672 -4.286 4.69e-05 ***
## X4:X8:X2:X7 16.4394 1.5154 10.849 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.131 on 87 degrees of freedom
## Multiple R-squared: 0.9562, Adjusted R-squared: 0.9502
## F-statistic: 158.4 on 12 and 87 DF, p-value: < 2.2e-16
I loved your data. Great challenge. Satisfying result!
palette(c("skyblue","skyblue4","orange","orangered", "firebrick", "firebrick4", "green", "limegreen"))
plot(Y ~ X7, data=rdat, pch=16, col=interaction(factor(rdat$X2),factor(rdat$X4),factor(rdat$X8)))
b <- coef(lm7)
abline(b[1], 0, col=palette()[1]) #X4=0, X8=0, X2=0
abline(b[1]+b[2], b[5], col=palette()[3]) #X4=1, X8=0, X2=0
abline(b[1]+b[3], b[6], col=palette()[5]) #X4=0, X8=1, X2=0
abline(b[1]+b[4], b[7], col=palette()[2]) #X4=0, X8=0, X2=1
abline(b[1]+b[2]+b[3]+b[10], b[5]+b[6]+b[11], col=palette()[7]) #X4=1, X8=1, X2=0
abline(b[1]+b[2]+b[4]+b[8], b[5]+b[7]+b[12], col=palette()[4]) #X4=1, X8=0, X2=1
abline(b[1]+b[3]+b[4]+b[9], b[6]+b[7], col=palette()[6]) #X4=0, X8=1, X2=1
abline(b[1]+b[2]+b[3]+b[4]+b[8]+b[9]+b[10], b[5]+b[6]+b[7]+b[11]+b[12]+b[13], col=palette()[8]) #X4=1, X8=1, X2=1